common:
  run_label: "run_1"
  log_freq: 500
  auto_resume: true
  mixed_precision: true
  channels_last: true
  tensorboard_logging: false
  grad_clip: 10.0
dataset:
  root_train: "/media/Datasets/ILSVRC2012-raw/train"
  root_val: "/media/Datasets/ILSVRC2012-raw/val"
  name: "imagenet"
  category: "classification"
  train_batch_size0: 85
  val_batch_size0: 85
  eval_batch_size0: 1
  workers: 6
  prefetch_factor: 2
  persistent_workers: false
  pin_memory: true
image_augmentation:
  random_resized_crop:
    enable: true
    interpolation: "bicubic"
  random_horizontal_flip:
    enable: true
  rand_augment:
    enable: true
  random_erase:
    enable: true
    p: 0.25
  mixup:
    enable: true
    alpha: 0.2
  cutmix:
    enable: true
    alpha: 1.0
  resize:
    enable: true
    size: 288 # shorter size is 288
    interpolation: "bicubic"
  center_crop:
    enable: true
    size: 256
sampler:
  name: "batch_sampler"
  bs:
    crop_size_width: 256
    crop_size_height: 256
loss:
  category: "classification"
  classification:
    name: "cross_entropy"
    label_smoothing: 0.1
optim:
  name: "adamw"
  weight_decay: 0.05
  no_decay_bn_filter_bias: true
  adamw:
    beta1: 0.9
    beta2: 0.999
scheduler:
  name: "cosine"
  is_iteration_based: false
  max_epochs: 300
  warmup_iterations: 20000
  warmup_init_lr: 1.e-6
  cosine:
    max_lr: 0.002
    min_lr: 0.0002
model:
  classification:
    name: "mobilevit_v3"
    mitv3:
      width_multiplier: 1.00
      attn_norm_layer: "layer_norm_2d"
    activation:
      name: "swish"
  normalization:
    name: "batch_norm"
    momentum: 0.1
  activation:
    name: "swish"
  layer:
    global_pool: "mean"
    conv_init:  "kaiming_normal"
    conv_init_std_dev: 0.02
    linear_init: "trunc_normal"
    linear_init_std_dev: 0.02
ema:
  enable: true
  momentum: 0.0005
ddp:
  disable: false # false if ddp is used
  dist_url: "tcp://XX.XXX.XXX.XXX:XXXX" # ip address of server with rank 0
  rank: 0   # unique rank for each server
  world_size: 6
  dist_port: 30768
stats:
  val: [ "loss", "top1", "top5" ]
  train: ["loss"]
  checkpoint_metric: "top1"
  checkpoint_metric_max: true
